{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Importing classes\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import os\n",
    "import xarray as xr\n",
    "import matplotlib.pyplot as plt\n",
    "import glob    # similar package like 'os'\n",
    "#from datetime import datetime\n",
    "import datetime as dt\n",
    "from dateutil.parser import parse\n",
    "import pickle\n",
    "# from scipy.io import netcdf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['/Users/jonguk/Desktop/Pandora_satellite_intercomparison/data/ERA5/UVwind_900to1000hPa_2019to2021_35.5to39.5North_125to129East.nc']\n"
     ]
    }
   ],
   "source": [
    "# This code is to read ERA-5 reanalysis data (0.25 deg grid) and average vertically. \n",
    "# Choose 4 most closest points from the Pandora site and store the u, v wind as pd.Dataframe\n",
    "\n",
    "# Directory of the hdf5 file to process\n",
    "datdir='/Users/jonguk/Desktop/Pandora_satellite_intercomparison/data/ERA5/'\n",
    "savedir='/Users/jonguk/Desktop/Pandora_satellite_intercomparison/data/ERA5/'\n",
    "files=glob.glob(datdir+'*2019*.nc')   # returns full file name with directory\n",
    "\n",
    "#Predetermined lat-lon (surface Pandora station)\n",
    "#Seoul-SNU site: 37.458 N / 126.951 E\n",
    "#UNIST (Ulsan) site: 35.5745 N / 129.1896 E \n",
    "#Pusan site: 35.2353 N / 129.0825 E\n",
    "sitename='Seoul_SNU'\n",
    "#lat_pandora= 30\n",
    "#lon_pandora= 90\n",
    "lat_pandora= 37.458\n",
    "lon_pandora= 126.951\n",
    "print(files)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "KeysView(<xarray.Dataset>\n",
      "Dimensions:    (expver: 2, latitude: 17, level: 5, longitude: 17, time: 7470)\n",
      "Coordinates:\n",
      "  * longitude  (longitude) float32 125.0 125.2 125.5 125.8 ... 128.5 128.8 129.0\n",
      "  * latitude   (latitude) float32 39.5 39.25 39.0 38.75 ... 36.0 35.75 35.5\n",
      "  * level      (level) int32 900 925 950 975 1000\n",
      "  * expver     (expver) int32 1 5\n",
      "  * time       (time) datetime64[ns] 2019-01-01 ... 2021-04-09T08:00:00\n",
      "Data variables:\n",
      "    u          (time, expver, level, latitude, longitude) float32 ...\n",
      "    v          (time, expver, level, latitude, longitude) float32 ...\n",
      "Attributes:\n",
      "    Conventions:  CF-1.6\n",
      "    history:      2021-04-14 04:52:31 GMT by grib_to_netcdf-2.16.0: /opt/ecmw...)\n"
     ]
    }
   ],
   "source": [
    "# Reading the data\n",
    "f=xr.open_dataset(files[0])\n",
    "print(f.keys())\n",
    "\n",
    "lat=f.latitude.values\n",
    "lon=f.longitude.values\n",
    "time=f.time.values\n",
    "\n",
    "u=f.u.sel(expver=1).values \n",
    "v=f.v.sel(expver=1).values\n",
    "\n",
    "time=time.astype('U')\n",
    "\n",
    "# Erasing the rows with invalid points (FEB, 2021~) and add files2\n",
    "validlen=np.sum(np.isfinite(u[:,1,9,9]))\n",
    "u=u[0:validlen,:,:,:]\n",
    "v=v[0:validlen,:,:,:]\n",
    "time=time[0:validlen]\n",
    "\n",
    "# # Below also works\n",
    "# u=u[np.isfinite(u)].reshape(-1,5,17,17)\n",
    "# v=v[np.isfinite(v)].reshape(-1,5,17,17)\n",
    "# print(np.std(ut-u), np.std(vt-v))\n",
    "\n",
    "timelist=[]\n",
    "for i, tt in enumerate(time):\n",
    "    timelist.append(dt.datetime.strptime(tt, '%Y-%m-%dT%H:%M:%S.%f000'))\n",
    "\n",
    "# print(timelist[0].year)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "KeysView(<xarray.Dataset>\n",
      "Dimensions:    (expver: 2, latitude: 17, level: 5, longitude: 17, time: 1080)\n",
      "Coordinates:\n",
      "  * longitude  (longitude) float32 125.0 125.2 125.5 125.8 ... 128.5 128.8 129.0\n",
      "  * latitude   (latitude) float32 39.5 39.25 39.0 38.75 ... 36.0 35.75 35.5\n",
      "  * level      (level) int32 900 925 950 975 1000\n",
      "  * expver     (expver) int32 1 5\n",
      "  * time       (time) datetime64[ns] 2021-02-01 ... 2021-05-31T08:00:00\n",
      "Data variables:\n",
      "    u          (time, expver, level, latitude, longitude) float32 ...\n",
      "    v          (time, expver, level, latitude, longitude) float32 ...\n",
      "Attributes:\n",
      "    Conventions:  CF-1.6\n",
      "    history:      2021-06-23 02:19:06 GMT by grib_to_netcdf-2.16.0: /opt/ecmw...)\n"
     ]
    }
   ],
   "source": [
    "# This is because the expver. has been changed since FEB 2021 for ERA5 reanalysis U,V wind field\n",
    "# Additional reading of ERA5 UV wind as a sepearate file (Since FEB 2021~) \n",
    "# Warning:: do not run this part (script) more than once, variables might be appended infinitively\n",
    "files2=glob.glob(datdir+'*FEB2021_MAY2021*.nc')   # returns full file name with directory\n",
    "\n",
    "f2=xr.open_dataset(files2[0])\n",
    "print(f2.keys())\n",
    "\n",
    "lat2=f2.latitude.values\n",
    "lon2=f2.longitude.values\n",
    "time2=f2.time.values\n",
    "\n",
    "# Below, should be aware before running this code (Please check the original .nc file in prior)\n",
    "# Code below assumes chunk of expver==1 comes first and than the series of data with expver==5 follows. \n",
    "# If the data is in otherwise, time data mismatches with the u,v data while appending.\n",
    "u2=f2.u.sel(expver=1).values \n",
    "v2=f2.v.sel(expver=1).values\n",
    "validlen=np.sum(np.isfinite(u2[:,1,1,1]))\n",
    "u2=u2[0:validlen,:,:,:]\n",
    "v2=v2[0:validlen,:,:,:]\n",
    "\n",
    "u3=f2.u.sel(expver=5).values \n",
    "v3=f2.v.sel(expver=5).values\n",
    "validlen=np.sum(np.isfinite(u3[:,1,9,9]))\n",
    "u3=u3[-validlen:,:,:,:]\n",
    "v3=v3[-validlen:,:,:,:]\n",
    "\n",
    "time2=time2.astype('U')\n",
    "\n",
    "timelist2=[]\n",
    "for i, tt in enumerate(time2):\n",
    "    timelist2.append(dt.datetime.strptime(tt, '%Y-%m-%dT%H:%M:%S.%f000'))\n",
    "\n",
    "# merging variables from two respective files \n",
    "time=np.append(time, time2, axis=0)\n",
    "u=np.append(u, u2, axis=0)\n",
    "v=np.append(v, v2, axis=0)\n",
    "\n",
    "u=np.append(u, u3, axis=0)\n",
    "v=np.append(v, v3, axis=0)\n",
    "\n",
    "timelist=timelist+timelist2  # list can be added this way\n",
    "# %whos\n",
    "# timelist[7000]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "KeysView(<xarray.Dataset>\n",
      "Dimensions:    (expver: 2, latitude: 17, level: 5, longitude: 17, time: 1080)\n",
      "Coordinates:\n",
      "  * longitude  (longitude) float32 125.0 125.2 125.5 125.8 ... 128.5 128.8 129.0\n",
      "  * latitude   (latitude) float32 39.5 39.25 39.0 38.75 ... 36.0 35.75 35.5\n",
      "  * level      (level) int32 900 925 950 975 1000\n",
      "  * expver     (expver) int32 1 5\n",
      "  * time       (time) datetime64[ns] 2021-02-01 ... 2021-05-31T08:00:00\n",
      "Data variables:\n",
      "    u          (time, expver, level, latitude, longitude) float32 ...\n",
      "    v          (time, expver, level, latitude, longitude) float32 ...\n",
      "Attributes:\n",
      "    Conventions:  CF-1.6\n",
      "    history:      2021-06-23 02:19:06 GMT by grib_to_netcdf-2.16.0: /opt/ecmw...)\n"
     ]
    }
   ],
   "source": [
    "# # This is because the expver. has been changed since FEB 2021 for ERA5 reanalysis U,V wind field\n",
    "# # Additional reading of ERA5 UV wind as a sepearate file (Since FEB 2021~) \n",
    "# # Warning:: do not run this part (script) more than once, variables might be appended infinitively\n",
    "# files2=glob.glob(datdir+'*FEB*.nc')   # returns full file name with directory\n",
    "\n",
    "# f2=xr.open_dataset(files2[0])\n",
    "# print(f2.keys())\n",
    "\n",
    "# lat2=f2.latitude.values\n",
    "# lon2=f2.longitude.values\n",
    "# time2=f2.time.values\n",
    "\n",
    "# u2=f2.u.values \n",
    "# v2=f2.v.values\n",
    "\n",
    "# time2=time2.astype('U')\n",
    "\n",
    "# timelist2=[]\n",
    "# for i, tt in enumerate(time2):\n",
    "#     timelist2.append(dt.datetime.strptime(tt, '%Y-%m-%dT%H:%M:%S.%f000'))\n",
    "\n",
    "# # merging variables from two respective files \n",
    "# time=np.append(time, time2, axis=0)\n",
    "# u=np.append(u, u2, axis=0)\n",
    "# v=np.append(v, v2, axis=0)\n",
    "# timelist=timelist+timelist2  # list can be added this way\n",
    "# # %whos\n",
    "# # timelist[7000]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "37.5 126.75\n",
      "[[0.163072 0.668928]\n",
      " [0.032928 0.135072]]\n",
      "1.0\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>time</th>\n",
       "      <th>u</th>\n",
       "      <th>v</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2019-01-01 00:00:00</td>\n",
       "      <td>6.312792</td>\n",
       "      <td>-3.112986</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2019-01-01 01:00:00</td>\n",
       "      <td>6.933334</td>\n",
       "      <td>-2.825669</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2019-01-01 02:00:00</td>\n",
       "      <td>7.334250</td>\n",
       "      <td>-2.329716</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2019-01-01 03:00:00</td>\n",
       "      <td>7.561433</td>\n",
       "      <td>-2.047698</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2019-01-01 04:00:00</td>\n",
       "      <td>7.849775</td>\n",
       "      <td>-2.150451</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7933</th>\n",
       "      <td>2021-05-31 04:00:00</td>\n",
       "      <td>-0.961064</td>\n",
       "      <td>1.621053</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7934</th>\n",
       "      <td>2021-05-31 05:00:00</td>\n",
       "      <td>-1.579621</td>\n",
       "      <td>1.663472</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7935</th>\n",
       "      <td>2021-05-31 06:00:00</td>\n",
       "      <td>-1.675532</td>\n",
       "      <td>2.065365</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7936</th>\n",
       "      <td>2021-05-31 07:00:00</td>\n",
       "      <td>-1.652909</td>\n",
       "      <td>2.540865</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7937</th>\n",
       "      <td>2021-05-31 08:00:00</td>\n",
       "      <td>-1.488695</td>\n",
       "      <td>2.982329</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>7938 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                    time         u         v\n",
       "0    2019-01-01 00:00:00  6.312792 -3.112986\n",
       "1    2019-01-01 01:00:00  6.933334 -2.825669\n",
       "2    2019-01-01 02:00:00  7.334250 -2.329716\n",
       "3    2019-01-01 03:00:00  7.561433 -2.047698\n",
       "4    2019-01-01 04:00:00  7.849775 -2.150451\n",
       "...                  ...       ...       ...\n",
       "7933 2021-05-31 04:00:00 -0.961064  1.621053\n",
       "7934 2021-05-31 05:00:00 -1.579621  1.663472\n",
       "7935 2021-05-31 06:00:00 -1.675532  2.065365\n",
       "7936 2021-05-31 07:00:00 -1.652909  2.540865\n",
       "7937 2021-05-31 08:00:00 -1.488695  2.982329\n",
       "\n",
       "[7938 rows x 3 columns]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Processing the data: making a time series data \n",
    "# 2-D interpolation to the exact Pandora location using four ERA-5 points in the vicinity\n",
    "pe=lat[0]\n",
    "for i, e in enumerate(lat):\n",
    "    if (e-lat_pandora)*(pe-lat_pandora) < 0: \n",
    "        stlatindx=i-1\n",
    "    pe=e\n",
    "\n",
    "pe=lon[0]\n",
    "for i, e in enumerate(lon):\n",
    "    if (e-lon_pandora)*(pe-lon_pandora) < 0:\n",
    "        stlonindx=i-1\n",
    "    pe=e\n",
    "\n",
    "print(lat[stlatindx], lon[stlonindx])\n",
    "\n",
    "#calculating spatial weights based on stlatindx and stlonindx\n",
    "weights=np.zeros([2,2])\n",
    "for i in np.arange(2): #lat\n",
    "    for j in np.arange(2): #lon\n",
    "        weights[i,j]=np.abs(lat[stlatindx+1-i]-lat_pandora)*np.abs(lon[stlonindx+1-j]-lon_pandora)\n",
    "\n",
    "weights=weights/(0.25**2)\n",
    "print(weights)\n",
    "print(np.sum(weights))\n",
    "\n",
    "# calculating hourly wind fields (vertically averaged + 2-D horizontal interpolation to exact Pandora location)\n",
    "# then save it as pd.DataFrame\n",
    "\n",
    "u_new=np.zeros([time.shape[0]])\n",
    "v_new=np.zeros([time.shape[0]])\n",
    "for tindx in np.arange(time.shape[0]):\n",
    "    for i in np.arange(2):  #lat\n",
    "        for j in np.arange(2):  #lon\n",
    "            u_new[tindx]+=np.nanmean(u[tindx,:,stlatindx+i,stlonindx+j])*weights[i,j]\n",
    "            v_new[tindx]+=np.nanmean(v[tindx,:,stlatindx+i,stlonindx+j])*weights[i,j]\n",
    "\n",
    "ERA5wind=pd.DataFrame(timelist, columns=['time'])\n",
    "ERA5wind['u']=pd.Series(u_new)\n",
    "ERA5wind['v']=pd.Series(v_new)\n",
    "ERA5wind\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "# # Saving the pd.DataFrame \"ERA5wind\" as pickle.\n",
    "# ERA5wind.to_pickle(savedir+'ERA5wind_fromJan2019toFEB2021_'+sitename+'.pkl')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Saving the pd.DataFrame \"ERA5wind\" as pickle.\n",
    "ERA5wind.to_pickle(savedir+'ERA5wind_fromJan2019toMay2021_'+sitename+'.pkl')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
